from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
import numpy as np
import os
def loadData(trainDataPath,testDataPath,trainLabelsPath,testLabelsPath):
    trainData = np.load(trainDataPath)
    trainLabels = np.load(trainLabelsPath)
    testData = np.load(testDataPath)
    testLabels = np.load(testLabelsPath)

    print 'Normalized!'
    minMaxScaler = preprocessing.MinMaxScaler()
    normed_train = minMaxScaler.fit_transform(trainData)
    normed_test = minMaxScaler.fit_transform(testData)
    return normed_train,normed_test,trainLabels,testLabels

def train():
    dataPath = os.getcwd()
    parent_path = os.path.dirname(dataPath)
    trainData,testData,trainLabels,testLabels = loadData(parent_path+'\feature\trainData.npy',parent_path+'\feature\testData.npy',parent_path+'\feature\trainLabels.npy',parent_path+'\feature\testLabels.npy')
    rfc = RandomForestClassifier(max_features ='sqrt',min_samples_split=1,n_jobs=-1)
    min_estimators = 5
    max_estimators = 200
    for i in range(min_estimators, max_estimators):
        rfc.set_params(n_estimators=i)
        predict_labels = rfc.fit(trainData,trainLabels).predict(testData)
        rfc_accuracy = rfc.score(testData , testLabels)

        confus = confusion_matrix(testLabels, predict_labels)
        print i,rfc_accuracy
        print confus